#include <fstream>
#include "LexicalAnalyzer.h"

int lexical_analyzer::get_char()
{
	int ch = m_pstream->get();
	str += ch;
	++m_current_index;
	if (ch == '\n')
		++m_line;
	return ch;
}

void lexical_analyzer::putback(char ch)
{
	if (ch == '\n')
		--m_line;
	--m_current_index;
//	str = str.substr(0, str.length() - 1);
	m_pstream->putback(ch);
}

bool lexical_analyzer::filter_space()
{
	bool result = false;
	char ch = peek_char();
	if (isspace(ch))
	{
		do
		{
			skip_char();
			ch = peek_char();
		} while (isspace(ch));
		return true;
	}
	return false;
}

bool lexical_analyzer::filter_comment()
{
	if (peek_char() == '/')
	{
		skip_char();
		char ch = get_char();
		if (ch == '/')
		{
			while (peek_char() != '\n')
			{
				skip_char();
			}
		}
		else if(ch == '*')
		{
			for (;;)
			{
				if (get_char() == '*' && get_char() == '/')
					break;
			}
		}
		else
		{
			putback('/');
			return false;
		}
		return true;
	}
	else
	{
		return false;
	}
}

bool lexical_analyzer::is_keyword(const std::string& str)
{
	static const char* const keywords[]=
	{
		"asm",      "auto",         "bad_cast",     "bad_typeid", 
		"bool",     "break",        "case",         "catch", 
		"char",     "class",        "const",        "const_cast", 
		"continue", "default",      "delete",       "do", 
		"double",   "dynamic_cast", "else",         "enum", 
		"except",   "explicit",     "extern",       "false", 
		"finally",  "float",        "for",          "friend", 
		"goto",     "if",           "inline",       "int", 
		"long",     "mutable",      "namespace",    "new", 
		"operator", "private",      "protected",    "public", 
		"register", "reinterpret_cast",     "return",   "short", 
		"signed",   "sizeof",       "static",       "static_cast", 
		"struct",   "switch",       "template",     "this", 
		"throw",    "true",         "try",          "typedef",
		"typeid",   "typename",     "union",        "unsigned",
		"using",    "virtual",      "void",         "volatile", 
		"while",  
	};

	for(int i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++)
	{
		if(str.compare(keywords[i]) == 0)
			return true;
	}

	return false;
}

std::string lexical_analyzer::get_string(char delimiter)
{
	std::string result;
	result += delimiter;
	for (;;)
	{
		char ch = get_char();
		if (ch == delimiter)
		{
			result += delimiter;
			break;
		}
		else if (ch == '\\')
		{
			ch = get_char();
			--m_current_index;
			switch(ch)
			{
			case '\"':
				ch = '\"';
				break;
			case '\'':
				ch = '\'';
				break;
			case 'r':
				ch = '\r';
				break;

			case 'n':
				ch = '\n';
				break;
			case 'v':
				ch = '\v';
				break;
			case 't':
				ch = '\t';
				break;
			case 'a':
				ch = '\a';
				break;
			case 'b':
				ch = '\b';
				break;
			case 'f':
				ch = '\f';
				break;
			case '\r':                          // line splice
			case '\n':
				continue;
				break;
			default:
				break;
			}

			if (ch == 'x' || ch == 'X')
			{
				std::string s = get_hex_string();
				int x = 0;
				for (int i = 0; i < s.length(); i++)
				{
					x *= 16;
					if(s[i] >= 'A' && s[i] <= 'F')
						x += s[i] - 'A' + 10;
					else if(s[i] >= 'a' && s[i] <= 'f')
						x += s[i] - 'a' + 10;
					else
						x += s[i] - '0';
				}
				ch = (char)x;
			}

		}

		result += ch;
	}

	return result;
}

std::string lexical_analyzer::get_digital_string()
{
	std::string result;
	char ch;
	while (isdigit(ch = get_char()))
	{
		result += ch;
	}
	putback(ch);

	return result;
}

std::string lexical_analyzer::get_hex_string()
{
	std::string result;
	char ch;
	while (isxdigit(ch = get_char()))
	{
		result += ch;
	}
	putback(ch);

	return result;
}

lexical_status lexical_analyzer::get_token(token& t)
{
	if (m_pstream->eof())
		return status_eof;

	while (filter_space() || filter_comment())
	{
	}
	while (filter_comment() || filter_space())
	{
	}

	if (m_pstream->eof())
		return status_eof;

	t.value.resize(0);

	char ch = get_char();
	if (ch == '_' || isalpha(ch) || ch == '$')
	{
		t.category = token_identifier;
		do
		{
			t.value += ch;
			ch = get_char();
		} while (ch == '_' || isalpha(ch) || ch == '$');
		putback(ch);
	}
	else if (isdigit(ch))
	{
		t.category = token_number;
		t.value += ch;
		ch = get_char();
		if (ch == 'x' || ch == 'X')
		{
			t.value += ch;
			t.value += get_hex_string();
		}
		else if (isdigit(ch))
		{
			t.value += ch;
			t.value += get_digital_string();
		}
		else 
		{
			putback(ch);
		}
	}
	else if (ch == '\"')
	{
		t.category = token_string;
		t.value = get_string('\"');
	}
	else if (ch == '\'')
	{
		t.category = token_char;
		t.value = get_string('\'');
	}
	else 
	{
		t.category = token_operator;
		if (ch == '=' || ch == '&' || ch == '|' || ch == ':')
		{
			t.value = ch;
			if (peek_char() == ch)
			{
				t.value += ch;
				skip_char();
			}
		}
		else if (ch == '+' || ch == '-')
		{
			t.value = ch;
			char cc = get_char();
			if (cc == ch)
			{
				t.value += ch;
			}
			else if (cc == '=')
			{
				t.value += '=';
			}
			else if (ch == '-' && cc == '>')
			{
				t.value += '>';                         // ->
				cc = peek_char();
				if (cc == '*')
				{
					skip_char();

					t.value += '*';                     // ->*
				}
			}
			else
			{
				putback(cc);
			}
		}
		else if(ch == '*' || ch == '/' || ch == '%' || ch == '^' || ch == '!')
		{
			t.value = ch;
			ch = peek_char();
			if (ch == '=')
			{
				t.value += '=';
				skip_char();
			}
		}
		else if (ch == '<' || ch == '>')
		{
			t.value = ch;
			char cc = get_char();
			if (ch == cc)                              // << >>
			{
				t.value += cc;
				cc = peek_char();
				if (cc == '=')                         // <<= >>=
				{
					skip_char();
					t.value += '=';
				}
			}
			else if (cc == '=')
			{
				t.value += '=';
			}
			else
			{
				putback(cc);
			}
		}
		else if (ch == '.')
		{
			t.value = '.';                          // .
			ch = get_char();
			if (ch == '*')
			{
				t.value += '*';                     // .*
			}
			else if (ch == '.')
			{
				char cc = get_char();
				if(cc == '.')                         // ...
				{
					t.value += "..";
				}
				else
				{
					putback(cc);
					putback(ch);
				}
			}
			else
			{
				putback(ch);
			}
		}
		else if (ch == '~' || ch =='?' || ch == '[' || ch == ']' || ch == '(' || ch == ')')
		{
			t.value = ch;
		}
		else if (ch == ';' || ch == '{'|| ch == '}'|| ch == ','|| ch == '#')
		{
			t.category = token_punctuator;
			t.value = ch;
		}
		else if (ch == '\\')
		{
			ch = peek_char();
			if(ch == '\r' || ch == '\n')
			{
				skip_char();
			}
			else
			{
				t.category = token_error;
				t.value = ch;
			}
		}
		else
		{
			t.category = token_error;
			t.value = ch;
			return status_invalid_char;
		}
	}

	if (t.category == token_identifier && is_keyword(t.value))
	{
		t.category = token_keyword;
	}

	return status_success;
}